In [2]:
import pandas as pd, numpy as np
import kendo_romania

Read data


In [3]:
matches={i:{} for i in range(1993,2019)}
Import data

2018

CR


In [4]:
filename='rawdata/2018/CR/CR25 - Public.xlsx'
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
matches[2018]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3)

SL


In [5]:
filename='rawdata/2018/SL/Prezenta SL_WKC17.xlsx'
sheetname=['F','M']
matches[2018]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,5)

2017

CN


In [6]:
categories=['Individual masculin','Echipe']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=0
matches[2017]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [7]:
categories=['Individual juniori mici','Individual juniori mari','Individual feminin']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CN']=matches[2017]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [8]:
categories=['Individual masculin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2017]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [9]:
categories=['Individual juniori','Individual veterani','Individual feminin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [10]:
categories=['Echipe']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=0
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

SL


In [11]:
filename='rawdata/2017/SL/Prezenta.xlsx'
sheetname=['F','M','J']
matches[2017]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

2016

SL


In [12]:
filename='rawdata/2016/SL/Event management - stagiul 4.xlsx'
sheetname=['F','M']
matches[2016]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [13]:
sheetname=['J']
matches[2016]['SL']=matches[2016]['SL']+\
        kendo_romania.get_matches_from_table(filename,sheetname,5)

CN


In [14]:
categories=['Individual masculin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2016]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [15]:
categories=['Individual feminin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [16]:
categories=['Echipe','Male team']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=0
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [17]:
categories=['Junior 1 individual','Junior 2 individual']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [18]:
filename='rawdata/2016/CR/Event management_CR23.2016.xlsx'
sheetname=['IF_m','IJ_m','IM_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2016]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [19]:
sheetname=['EJ_m','ES_m']
matches[2016]['CR']=matches[2016]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

2015

SL


In [20]:
filename='rawdata/2015/SL/Event management - stagiul 5.xlsx'
sheetname=['SF_s','SM_s']
matches[2015]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CN


In [21]:
filename='rawdata/2015/CN/Event management_CN22.2015.xlsx'
sheetname=['IF_m','IJ2_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [22]:
sheetname='E_m'
matches[2015]['CN']=matches[2015]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

CR


In [23]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IF_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [24]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ1_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=7,shift=1,nrows=9)

In [25]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ2_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=8,shift=12,nrows=8)
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=16,shift=12,nrows=8)

In [26]:
sheetname=['IM_s']
column_keys={'match_type':19,'aka':{'name':20,'point1':21},
             'shiro':{'name':24,'point1':23},'outcome':22}
shift=0
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=10
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

2014

SL


In [27]:
filename='rawdata/2014/SL/Lista de participanti 6.xlsx'
sheetname=['SF_s','SM_s','J_s']
matches[2014]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CR


In [28]:
filename='rawdata/2014/CR/Event management_CR21.2014.xlsx'
sheetname=['IC-10_m','IC_m','IJ_m','IS_m','IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [29]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=8
matches[2014]['CR']=matches[2014]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

CN


In [30]:
filename='rawdata/2014/CN/Event management_CN21.2014 - v2.xlsx'
sheetname=['IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [31]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=19
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=29
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [32]:
sheetname=['IJ1_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,7,shift=1,nrows=10)

In [33]:
sheetname=['IJ2_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,8,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,14,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,20,shift=12,nrows=6)

2013

CN


In [34]:
filename='rawdata/2013/CN/Event management_CN2013.xlsx'
sheetname=['IS_m','IF_m','IC_m','IJ_m','E_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [35]:
filename='rawdata/2013/CR/Event management_CR2013.xlsx'
sheetname=['IF_meciuri','IJ_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

SL


In [36]:
filename='rawdata/2013/SL/Event management.xlsx'
sheetname=['E_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['SL']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [37]:
sheetname=['Schema feminin']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=14)
sheetname=['Schema juniori']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=12)

2012

CN


In [38]:
filename='rawdata/2012/CN/Event management CN2012.xlsx'
sheetname=['E_meciuri','IJ_meciuri','IF_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2012]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [39]:
filename='rawdata/2012/CR/2012.05.05-06 - CR - Cluj.xlsx'
sheetname=['IC']
matches[2012]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=4)

In [40]:
sheetname=['IJ']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,14,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,19,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,24,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,30,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,35,shift=1,nrows=3)

In [41]:
sheetname=['IF']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,22,shift=shift)

In [42]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [43]:
sheetname=['ES']
column_keys={'match_type':20,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=4
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=9
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

2011

CN


In [44]:
filename='rawdata/2011/CN/2011.11.26-27 - CN - Bucuresti_print.xlsx'
sheetname=['IJ']
matches[2011]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1)

In [45]:
sheetname=['IF']
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,28,shift=shift)

In [46]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [47]:
sheetname=['E']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=17
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=23
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=29
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

CR


In [48]:
filename='rawdata/2011/CR/2011.04.16-17 - CR - Miercurea Ciuc.xlsx'
sheetname=['ES']
column_keys={'match_type':6,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2011]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [49]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [50]:
sheetname=['IF']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [51]:
sheetname=['IJ']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [52]:
sheetname=['IC']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [53]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

2010

CR


In [54]:
filename='rawdata/2010/CR/2010.03.27-28 - CR - Budeasa.xlsx'
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2010]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [55]:
sheetname=['IF']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [56]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

In [57]:
sheetname=['IJ']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [58]:
sheetname=['IC']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

CN


In [59]:
filename='rawdata/2010/CN/2010.11.27-28 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2010]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=5)

In [60]:
sheetname=['IC']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [61]:
sheetname=['IF']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [62]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=12
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [63]:
sheetname=['E']
column_keys={'match_type':15,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=5
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=11
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

2009

CN


In [64]:
filename='rawdata/2009/CN/2009.11.28-29 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2009]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [65]:
sheetname=['IF']
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [66]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [67]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

CR


In [68]:
filename='rawdata/2009/CR/2009.04.04 - CR - Budeasa - print.xlsx'
sheetname=['IJ']
matches[2009]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [69]:
sheetname=['IF']
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=6)

In [70]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [71]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

Clean up points, matches, player names


In [72]:
def match_cleaner(year,match):
    kind,phase='Unknown','Unknown'
    if '#' in match:
        stage0=match.split('#')[0].lower()
        stage1=match.split('#')[1]
        if 'pool' in stage1: 
            phase='Pool'
        if 'Pool' in stage1: 
            phase='Pool'
        elif 'prel' in stage1: 
            phase='Prelim.'
        elif 'Prel' in stage1: 
            phase='Prelim.'
        elif 'layoff' in stage1: 
            phase='Prelim.'
        elif '- F' in stage1: 
            phase='Finals'
        elif 'F -' in stage1: 
            phase='Finals'
        elif 'Final' in stage1: 
            phase='Finals'
        elif 'SF' in stage1: 
            phase='Finals'
        elif 'QF' in stage1: 
            phase='Finals'
        elif 'A'==stage1: phase='Pool'
        elif 'B'==stage1: phase='Pool'
        elif 'C'==stage1: phase='Pool'
        elif 'D'==stage1: phase='Pool'
        elif 'E'==stage1: phase='Pool'
        elif 'F'==stage1: phase='Pool'
        elif 'G'==stage1: phase='Pool'
        elif 'H'==stage1: phase='Pool'
        elif 'I'==stage1: phase='Pool'
        elif 'J'==stage1: phase='Pool'
        elif 'K'==stage1: phase='Pool'
        elif 'L'==stage1: phase='Pool'
        elif 'M'==stage1: phase='Pool'
        elif 'N'==stage1: phase='Pool'
        elif 'O'==stage1: phase='Pool'
        elif 'P'==stage1: phase='Pool'
        elif 'Q'==stage1: phase='Pool'
        elif 'R'==stage1: phase='Pool'
        elif 'S'==stage1: phase='Pool'
        elif 'T'==stage1: phase='Pool'
        
        if 'IS' in stage1:
            kind="Senior's Individual"
        elif 'IF' in stage1:
            kind="Women's Individual"
        elif 'IM' in stage1:
            kind="Men's Individual"
        elif 'IC' in stage1:
            kind="Children's Individual"
        elif 'IJ' in stage1:
            kind="Junior's Individual"
        elif 'EJ' in stage1:
            kind="Junior's Team"
        elif 'EF' in stage1:
            kind="Men's Team"
        elif 'ES' in stage1:
            kind="Senior's Team"
            
        if 'individual masculin.' in stage0:
            kind="Men's Individual"
        if 'echipe.' in stage0:
            kind="Mixed Team"
        if 'individual juniori' in stage0:
            kind="Junior's Team"
        if 'individual feminin' in stage0:
            kind="Junior's Team"
        if 'individual veterani' in stage0:
            kind="Senior's Team"
        if 'male team' in stage0:
            kind="Men's Team"
        if 'junior 1 individual' in stage0:
            kind="Junior's Individual"
        if 'junior 2 individual' in stage0:
            kind="Junior's Individual"
        
    elif match=='F':
        kind="Women's Individual"
    elif match=='M':
        kind="Men's Individual"
    elif match=='J':
        kind="Junior's Individual"
    elif match=='SF_s':
        kind="Women's Individual"
    elif match=='SM_s':
        kind="Men's Individual"
    elif match=='J_s':
        kind="Junior's Individual"
    
    if kind=='Unknown':
        category='Unknown'
        teams='Unknown'
    else:
        category=kind.split(' ')[0][:-2]
        teams=kind.split(' ')[1]
    if year<2014: 
        category=category.replace('Senior','Men')
    if year in [2018]: 
        category=category.replace('Senior','Men')
    return category,teams,phase

In [73]:
name_exceptions={'Atanasovski':'Atanasovski A. (MAC)',
                 'Dobrovicescu (SON)':'Dobrovicescu T. (SON)',
                 'Ianăș':'Ianăș F.',
                 'Arabadjiyski': 'Arabadjiyski A.',
                 'Mandia':'Mandia F.',
                 'Stanev':'Stanev A.',
                 'Mochalov':'Mochalov O.',
                 'Sozzi':'Sozzi A.',
                 'Crăciunel':'Crăciunel I.',
                 'Craciunel':'Crăciunel I.',
                 'Sagaev':'Sagaev L.',
                 'Buzás':'Buzás C.',
                 'Csala':'Csala D.',
                 'Dimitrov':'Dimitrov M.',
                 'Józsa':'Józsa L.',
                 'Creangă':'Creangă A.',
                 'Duțescu':'Duțescu M.',                 
                 'Furtună':'Furtună G.',
                 'Gârbea':'Gârbea I.',
                 'Stupu':'Stupu I.',
                 'Mahika-Voiconi':'Mahika-Voiconi S.',
                 'Mahika':'Mahika-Voiconi S.',
                 'Stanciu':'Stanciu F.',
                 'Vrânceanu':'Vrânceanu R.',
                 'Luca':'Luca M.',
                 'Wolfs':'Wolfs J.',
                 'Ducarme':'Ducarme A.',
                 'Sbârcea':'Sbârcea B.',
                 'Mocian':'Mocian A.',
                 'Hatvani':'Hatvani L.',
                 'Dusan':'Dusan N.',
                 'Borota':'Borota V.',
                 'Tsushima':'Tsushima K.',
                 'Tráser':'Tráser T.',
                 'Colțea':'Colțea A.',
                 'Brîcov':'Brîcov A.',
                 'Yamamoto':'Yamamoto M.',
                 'Crăciun':'Crăciun D.'}
redflags_names=['-','—','—',np.nan,'. ()','— ','- -.','- -. (-)',
                'Kashi','Sankon','București','Victorii:','Sakura','Taiken','Ikada','Sonkei','CRK','Museido',
                'Ichimon','Bushi Tokukai 1','Competitori – Shiai-sha','Echipa - roşu','Numele şi prenumele',
                'Victorii:','Victorii: 0','Victorii: 1','Victorii: 2','Victorii: 3','Victorii: 4',
                'Victorii: 5','?','Kyobukan','2/5','2/6','3/8','Finala','Kyobukan (0/0/0)','―',
                '(clasament final după meci de baraj)','CRK (Bucuresti)','Kaybukan','Isshin (Cluj)',
                'Ikada (Bucureşti)','Kyobukan (Braşov)','Puncte:','KASHI','Budoshin','Isshin',
                '— (—)','4. B.','4. Baraj: Stupu M - Hostina','4. Baraj: Moise KM - Korenschi M',
               'Bushi Tokukai (2/8/17)','CRK 2 (1/6/14)', 'CRK 2','CRK 1','Loc I.:',
               'Bushi Tokukai 2 (M Ciuc)','Echipa suport']
redflags_names2=['Bushi Tokukai','Eliminatoriu','finala','Finala','Fianala','Ikada','Ichimon','Pool',
                'Locul ','Lotul ','Loc ','Grupa ','Isshin','Meciul ','Victorii:']
name_equals={'Chirea M.':'Chirea A.',
            'Ghinet C.':'Ghineț C.',
            'Domnița M.':'Domniță M.',
            'Garbea I.':'Gârbea I.',
            'Horvát M.':'Horváth M.',
            'Ionita A.':'Ioniță A.',
            'Medvedschi I.':'Medvețchi I.',
            'Mahika S.':'Mahika-Voiconi S.',
            'Mate L.':'Máté L.',
            'Stupu I.':'Stupu A.',
            'Ah-Hu S.':'Ah-hu S.',
            'Alexa I.':'Alexa A.',
            'Angelescu M.':'Angelescu M.',
            'Apostu D.':'Apostu T.',
            'Brâcov A.':'Brîcov A.',
            'Catoriu D.':'Cantoriu D.',
            'Călina A.':'Călina C.',
            'Korenshi E.':'Korenschi E.',
            'Pleșa R.':'Pleșea R.',
            'Galos A.':'Galoș A.',
            'Győrfi G.':'Györfi G.',
            'Győrfi S.':'Györfi S.',
            'Hostina E.':'Hoștină E.', 
            'Hostină E.':'Hoștină E.', 
            'Ianăs F.':'Ianăș F.',
            'Lacatus M.':'Lăcătuș M.',
            'Máthé L.':'Máté L.',
            'Nastase M.':'Năstase E.',
            'Oprisan A.':'Oprișan A.',
            'Pârlea A.':'Pîrlea A.',
            'Sabau D.':'Sabău D.',
            'Spriu C.':'Spiru C.',
            'Stănculascu C.':'Stănculescu C.',
            'Vrânceanu M.': 'Vrânceanu L.',
            'Wasicek V.':'Wasicheck W.',
            'Wasicsec W.':'Wasicheck W.',
            'Wasicsek W.':'Wasicheck W.',
            'Zolfoghari A.':'Zolfaghari A.'}
letter_norm={'ţ':'ț','ş':'ș','Ş':'Ș'}
def name_cleaner(name):
    for letter in letter_norm:
        name=name.replace(letter,letter_norm[letter])
    if name not in name_exceptions:
        nc=name.replace('  ',' ').split('(')    
    else:
        nc=name_exceptions[name].split('(')
    rname=nc[0].strip()
    rnames=rname.split(' ')
    sname=rnames[0]+' '+rnames[1][0]+'.'
    if sname in name_equals:
        sname=name_equals[sname]
    return sname

In [74]:
def name_ok(name):
    if name==np.nan: return False
    if str(name)=='nan': return False
    if name not in redflags_names:
        if np.array([i not in name for i in redflags_names2]).all():
            return True
    return False

Standardize names


In [75]:
all_players={}
all_players_r={}
all_players_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            for color in ['aka','shiro']:
                name=match[color]['name']
                all_players_unsorted.add(name)
                if name_ok(name):
                    name=name_cleaner(name)
                    rname=match[color]['name']
                    if rname not in all_players_r:all_players_r[rname]=name
                    if name not in all_players: all_players[name]={}
                    if year not in all_players[name]:all_players[name][year]={'names':set()}
                    all_players[name][year]['names'].add(rname)

In [76]:
all_shinpan={}
all_shinpan_r={}
all_shinpan_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            if 'shinpan' in match:
                for color in ['fukushin1','shushin','fukushin2']:
                    aka=match['aka']['name']
                    shiro=match['shiro']['name']
                    if (name_ok(aka)) and\
                       (name_ok(shiro)) and\
                       (name_cleaner(aka) in all_players) and\
                       (name_cleaner(shiro) in all_players):
                        rname=match['shinpan'][color]
                        all_shinpan_unsorted.add(rname)
                        if name_ok(rname):
                            name=name_cleaner(rname)
                            if name not in all_shinpan: all_shinpan[name]=[] 
                            all_shinpan[name].append(match)
                            if rname not in all_shinpan_r:all_shinpan_r[rname]=name

Infer clubs


In [77]:
#naive infer
redflags_clubs=['','N/A','RO1','RO2']
club_equals={'MLD':'MOL',
             'IKD':'IKA',
             'BUL':'BUL/Bg',
             'TUR':'TUR/Tr',
             'MAC':'MAC/Mc',
             'MNE':'MNE/Mn',
             'SRB':'SRB/Sr',
             'ITA':'ITA/It',
             'ISS':'ISH',
             'Musso, Bg':'MUS/Bg',
             'Makoto, Sr':'MAK/Sr',
             'Szeged, Hu':'SZE/Hu'}
for name in all_players:
    for year in all_players[name]:
        for name_form in all_players[name][year]['names']:
            if '(' in name_form:
                club=name_form.split('(')[1].strip()[:-1]
                if club in club_equals: club=club_equals[club]
                if club not in redflags_clubs:
                    all_players[name][year]['club']=club

In [78]:
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #more than 1 year?
            years=np.sort(list(all_players[name].keys()))
            if len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

In [79]:
clubs_manual={'Balázs-Kercsó Z.':'BTK',
             'Vrânceanu M.':'SAN',
             'Duțescu M.':'IKA',
             'Crăciun D.':'SAM',
             'Nagy V.':'ISH',
             'Goró L.':'BTK',
             'Ghineț G.':'YUK',
             'Cioată E.':'KAS',
             'Leat M.':'IKA',
             'Perianu S.':'KNS',
             'Ah-hu S.':'CRK',
             'Preda A.':'CRK',
             'Luca M.':'IKA',
             'Salló Z.':'BTK',
             'András Z.':'BTK',
             'Bíró S.':'BTK',
             'Neagu F.':'IKA',
             'Bódi Z.':'KYO',
             'Bumbu D.':'ISH',
             'Botean A.':'ISH',
             'Moldoveanu M.':'ISH',
             'Jeszenszki T.':'BTK',
             'Ianăș F.':'SAM',
             'Suru N.':'SAM',
             'Balázs S.':'BTK',
             'Perdi L.':'ISH',
             'Vrânceanu L.':'SAN',
             'Oprișan A.':'IKA',
             'Horváth D.':'BTK',
             'Sandache I.':'BTK',
             'Crăciunel C.':'ICH',
             'Crăciunel V.':'ICH',
             'Crăciunel I.':'ICH',
             'Georgescu S.':'CRK',
             'Búzás C.':'BTK',
             'Moise T.':'KAY'}
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #if still not found, print error, infer other way
            if name in clubs_manual:
                all_players[name][year]['club']=clubs_manual[name]
            else:
                print('error',name,year,all_players[name])
                all_players[name][year]['club']='XXX'


error Cristea A. 2009 {2009: {'names': {'Cristea A.', 'Cristea Adrian'}}, 2014: {'names': {'Cristea Andrei'}}, 2015: {'names': {'Cristea Andrei'}}}
error Cristea A. 2014 {2009: {'names': {'Cristea A.', 'Cristea Adrian'}, 'club': 'XXX'}, 2014: {'names': {'Cristea Andrei'}}, 2015: {'names': {'Cristea Andrei'}}}
error Cristea A. 2015 {2009: {'names': {'Cristea A.', 'Cristea Adrian'}, 'club': 'XXX'}, 2014: {'names': {'Cristea Andrei'}, 'club': 'XXX'}, 2015: {'names': {'Cristea Andrei'}}}
error Alexandrescu N. 2009 {2009: {'names': {'Alexandrescu N.'}}, 2010: {'names': {'Alexandrescu N.', 'Alexandrescu Nic.'}}}
error Alexandrescu N. 2010 {2009: {'names': {'Alexandrescu N.'}, 'club': 'XXX'}, 2010: {'names': {'Alexandrescu N.', 'Alexandrescu Nic.'}}}
error Macavei A. 2009 {2009: {'names': {'Macavei Aurel'}}}
error Grossu D. 2009 {2009: {'names': {'Grossu D.', 'Grossu Dragos'}}}
error Rotaru V. 2009 {2009: {'names': {'Rotaru V.'}}}
error Macavei I. 2009 {2009: {'names': {'Macavei I.'}}}
error Mehelean L. 2009 {2009: {'names': {'Mehelean Ligia'}}}
error Dumbravă L. 2009 {2009: {'names': {'Dumbravă Lucian'}}}
error Xantopol C. 2009 {2009: {'names': {'Xantopol Claudiu'}}}
error Szabó S. 2009 {2009: {'names': {'Szabó Simon'}}, 2010: {'names': {'Szabó Simon'}}, 2011: {'names': {'Szabó Simon'}}, 2012: {'names': {'Szabó Simon'}}, 2013: {'names': {'Szabó Simon Dániel'}}, 2014: {'names': {'Szabó Simon'}}}
error Szabó S. 2010 {2009: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2010: {'names': {'Szabó Simon'}}, 2011: {'names': {'Szabó Simon'}}, 2012: {'names': {'Szabó Simon'}}, 2013: {'names': {'Szabó Simon Dániel'}}, 2014: {'names': {'Szabó Simon'}}}
error Szabó S. 2011 {2009: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2010: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2011: {'names': {'Szabó Simon'}}, 2012: {'names': {'Szabó Simon'}}, 2013: {'names': {'Szabó Simon Dániel'}}, 2014: {'names': {'Szabó Simon'}}}
error Szabó S. 2012 {2009: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2010: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2011: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2012: {'names': {'Szabó Simon'}}, 2013: {'names': {'Szabó Simon Dániel'}}, 2014: {'names': {'Szabó Simon'}}}
error Szabó S. 2013 {2009: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2010: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2011: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2012: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2013: {'names': {'Szabó Simon Dániel'}}, 2014: {'names': {'Szabó Simon'}}}
error Szabó S. 2014 {2009: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2010: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2011: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2012: {'names': {'Szabó Simon'}, 'club': 'XXX'}, 2013: {'names': {'Szabó Simon Dániel'}, 'club': 'XXX'}, 2014: {'names': {'Szabó Simon'}}}
error Pienaru S. 2009 {2009: {'names': {'Pienaru Sorin'}}, 2010: {'names': {'Pienaru Sorin'}}}
error Pienaru S. 2010 {2009: {'names': {'Pienaru Sorin'}, 'club': 'XXX'}, 2010: {'names': {'Pienaru Sorin'}}}
error Purdel C. 2010 {2010: {'names': {'Purdel Cristian'}}, 2011: {'names': {'Purdel Cristian', 'Purdel C.'}}}
error Purdel C. 2011 {2010: {'names': {'Purdel Cristian'}, 'club': 'XXX'}, 2011: {'names': {'Purdel Cristian', 'Purdel C.'}}}
error Iordan R. 2010 {2010: {'names': {'Iordan Relu'}}}
error Constantinescu A. 2010 {2010: {'names': {'Constantinescu Andrei'}}}
error Nechifor C. 2010 {2010: {'names': {'Nechifor Cristian'}}}
error Chiric D. 2010 {2010: {'names': {'Chiric Dragoș'}}}
error Zinculescu T. 2010 {2010: {'names': {'Zinculescu Tiberiu'}}}
error Cerneavschi A. 2010 {2010: {'names': {'Cerneavschi Adrian'}}}
error Cerchez I. 2010 {2010: {'names': {'Cerchez Iuliana'}}, 2011: {'names': {'Cerchez Iuliana', 'Cerchez I.'}}}
error Cerchez I. 2011 {2010: {'names': {'Cerchez Iuliana'}, 'club': 'XXX'}, 2011: {'names': {'Cerchez Iuliana', 'Cerchez I.'}}}
error Ștefan C. 2010 {2010: {'names': {'Ștefan Cristina', 'Ştefan Cristina'}}, 2011: {'names': {'Ştefan C.', 'Ştefan Cristina'}}, 2012: {'names': {'Ştefan Cristina'}}, 2013: {'names': {'Ștefan Cristina'}}}
error Ștefan C. 2011 {2010: {'names': {'Ștefan Cristina', 'Ştefan Cristina'}, 'club': 'XXX'}, 2011: {'names': {'Ştefan C.', 'Ştefan Cristina'}}, 2012: {'names': {'Ştefan Cristina'}}, 2013: {'names': {'Ștefan Cristina'}}}
error Ștefan C. 2012 {2010: {'names': {'Ștefan Cristina', 'Ştefan Cristina'}, 'club': 'XXX'}, 2011: {'names': {'Ştefan C.', 'Ştefan Cristina'}, 'club': 'XXX'}, 2012: {'names': {'Ştefan Cristina'}}, 2013: {'names': {'Ștefan Cristina'}}}
error Ștefan C. 2013 {2010: {'names': {'Ștefan Cristina', 'Ştefan Cristina'}, 'club': 'XXX'}, 2011: {'names': {'Ştefan C.', 'Ştefan Cristina'}, 'club': 'XXX'}, 2012: {'names': {'Ştefan Cristina'}, 'club': 'XXX'}, 2013: {'names': {'Ștefan Cristina'}}}
error Mihai I. 2010 {2010: {'names': {'Mihai Ioana'}}, 2011: {'names': {'Mihai Ioana'}}, 2012: {'names': {'Mihai Ioana'}}, 2013: {'names': {'Mihai Ioana'}}}
error Mihai I. 2011 {2010: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2011: {'names': {'Mihai Ioana'}}, 2012: {'names': {'Mihai Ioana'}}, 2013: {'names': {'Mihai Ioana'}}}
error Mihai I. 2012 {2010: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2011: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2012: {'names': {'Mihai Ioana'}}, 2013: {'names': {'Mihai Ioana'}}}
error Mihai I. 2013 {2010: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2011: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2012: {'names': {'Mihai Ioana'}, 'club': 'XXX'}, 2013: {'names': {'Mihai Ioana'}}}
error Blaj V. 2010 {2010: {'names': {'Blaj Valentin'}}, 2011: {'names': {'Blaj Valentin'}}}
error Blaj V. 2011 {2010: {'names': {'Blaj Valentin'}, 'club': 'XXX'}, 2011: {'names': {'Blaj Valentin'}}}
error Zolfaghari A. 2010 {2010: {'names': {'Zolfoghari Anahita', 'Zolfaghari Anah.'}}}
error Ionescu I. 2010 {2010: {'names': {'Ionescu Ilinca'}}, 2014: {'names': {'Ionescu Ilinca'}}}
error Ionescu I. 2014 {2010: {'names': {'Ionescu Ilinca'}, 'club': 'XXX'}, 2014: {'names': {'Ionescu Ilinca'}}}
error Crișan E. 2010 {2010: {'names': {'Crişan Eugen', 'Crișan Eugen'}}, 2011: {'names': {'Crişan Eugen', 'Crișan E.', 'Crişan E.'}}, 2012: {'names': {'Crişan Eugen'}}}
error Crișan E. 2011 {2010: {'names': {'Crişan Eugen', 'Crișan Eugen'}, 'club': 'XXX'}, 2011: {'names': {'Crişan Eugen', 'Crișan E.', 'Crişan E.'}}, 2012: {'names': {'Crişan Eugen'}}}
error Crișan E. 2012 {2010: {'names': {'Crişan Eugen', 'Crișan Eugen'}, 'club': 'XXX'}, 2011: {'names': {'Crişan Eugen', 'Crișan E.', 'Crişan E.'}, 'club': 'XXX'}, 2012: {'names': {'Crişan Eugen'}}}
error Zainea I. 2010 {2010: {'names': {'Zainea Ionuț', 'Zainea Ionuţ'}}}
error Stănculescu C. 2010 {2010: {'names': {'Stănculescu Cristian', 'Stănculescu Cri.'}}, 2013: {'names': {'Stănculascu Cristian'}}}
error Stănculescu C. 2013 {2010: {'names': {'Stănculescu Cristian', 'Stănculescu Cri.'}, 'club': 'XXX'}, 2013: {'names': {'Stănculascu Cristian'}}}
error Gheorghiu R. 2010 {2010: {'names': {'Gheorghiu Radu'}}, 2011: {'names': {'Gheorghiu R.', 'Gheorghiu Radu'}}, 2012: {'names': {'Gheorghiu Radu'}}, 2013: {'names': {'Gheorghiu Radu'}}}
error Gheorghiu R. 2011 {2010: {'names': {'Gheorghiu Radu'}, 'club': 'XXX'}, 2011: {'names': {'Gheorghiu R.', 'Gheorghiu Radu'}}, 2012: {'names': {'Gheorghiu Radu'}}, 2013: {'names': {'Gheorghiu Radu'}}}
error Gheorghiu R. 2012 {2010: {'names': {'Gheorghiu Radu'}, 'club': 'XXX'}, 2011: {'names': {'Gheorghiu R.', 'Gheorghiu Radu'}, 'club': 'XXX'}, 2012: {'names': {'Gheorghiu Radu'}}, 2013: {'names': {'Gheorghiu Radu'}}}
error Gheorghiu R. 2013 {2010: {'names': {'Gheorghiu Radu'}, 'club': 'XXX'}, 2011: {'names': {'Gheorghiu R.', 'Gheorghiu Radu'}, 'club': 'XXX'}, 2012: {'names': {'Gheorghiu Radu'}, 'club': 'XXX'}, 2013: {'names': {'Gheorghiu Radu'}}}
error Andrei O. 2010 {2010: {'names': {'Andrei Ovidiu'}}}
error Bărbulescu E. 2010 {2010: {'names': {'Bărbulescu Eduard', 'Bărbulescu Ed.'}}}
error Turdean S. 2010 {2010: {'names': {'Turdean Sergiu'}}, 2011: {'names': {'Turdean Sergiu', 'Turdean S.'}}}
error Turdean S. 2011 {2010: {'names': {'Turdean Sergiu'}, 'club': 'XXX'}, 2011: {'names': {'Turdean Sergiu', 'Turdean S.'}}}
error Georgescu B. 2010 {2010: {'names': {'Georgescu Bogdan', 'Georgescu B.'}}}
error Georgescu R. 2010 {2010: {'names': {'Georgescu Radu'}}, 2011: {'names': {'Georgescu R.'}}}
error Georgescu R. 2011 {2010: {'names': {'Georgescu Radu'}, 'club': 'XXX'}, 2011: {'names': {'Georgescu R.'}}}
error Wasicheck W. 2010 {2010: {'names': {'Wasicek Verner'}}, 2011: {'names': {'Wasicsek W.', 'Wasicsek Werner'}}, 2012: {'names': {'Wasicsec Werner', 'Wasicheck Werner'}}, 2013: {'names': {'Wasicsek Werner'}}}
error Wasicheck W. 2011 {2010: {'names': {'Wasicek Verner'}, 'club': 'XXX'}, 2011: {'names': {'Wasicsek W.', 'Wasicsek Werner'}}, 2012: {'names': {'Wasicsec Werner', 'Wasicheck Werner'}}, 2013: {'names': {'Wasicsek Werner'}}}
error Wasicheck W. 2012 {2010: {'names': {'Wasicek Verner'}, 'club': 'XXX'}, 2011: {'names': {'Wasicsek W.', 'Wasicsek Werner'}, 'club': 'XXX'}, 2012: {'names': {'Wasicsec Werner', 'Wasicheck Werner'}}, 2013: {'names': {'Wasicsek Werner'}}}
error Wasicheck W. 2013 {2010: {'names': {'Wasicek Verner'}, 'club': 'XXX'}, 2011: {'names': {'Wasicsek W.', 'Wasicsek Werner'}, 'club': 'XXX'}, 2012: {'names': {'Wasicsec Werner', 'Wasicheck Werner'}, 'club': 'XXX'}, 2013: {'names': {'Wasicsek Werner'}}}
error Martin M. 2010 {2010: {'names': {'Martin Mihai'}}}
error Moga D. 2011 {2011: {'names': {'Moga D', 'Moga Dorin'}}, 2013: {'names': {'Moga Dorin'}}}
error Moga D. 2013 {2011: {'names': {'Moga D', 'Moga Dorin'}, 'club': 'XXX'}, 2013: {'names': {'Moga Dorin'}}}
error Constantina A. 2011 {2011: {'names': {'Constantina Alexandru', 'Constantina A.'}}}
error Keresztes M. 2011 {2011: {'names': {'Keresztes Mátyás', 'Keresztes M.'}}}
error Galoș A. 2011 {2011: {'names': {'Galoş Alin', 'Galos A.'}}}
error Albert V. 2011 {2011: {'names': {'Albert V.'}}}
error Lăcătuș M. 2011 {2011: {'names': {'Lacatus Mihai'}}, 2012: {'names': {'Lăcătuş Mihai'}}}
error Lăcătuș M. 2012 {2011: {'names': {'Lacatus Mihai'}, 'club': 'XXX'}, 2012: {'names': {'Lăcătuş Mihai'}}}
error Pavel A. 2012 {2012: {'names': {'Pavel Alexandra'}}, 2013: {'names': {'Pavel Alexandra'}}}
error Pavel A. 2013 {2012: {'names': {'Pavel Alexandra'}, 'club': 'XXX'}, 2013: {'names': {'Pavel Alexandra'}}}
error Parlea A. 2012 {2012: {'names': {'Parlea Andrei'}}}
error Matea C. 2012 {2012: {'names': {'Matea Coralia Ioana'}}, 2013: {'names': {'Matea Coralia Ioana', 'Matea Coralia', 'Matea Coralia-Ioana'}}, 2014: {'names': {'Matea Coralia'}}, 2015: {'names': {'Matea Coralia-Ioana'}}}
error Matea C. 2013 {2012: {'names': {'Matea Coralia Ioana'}, 'club': 'XXX'}, 2013: {'names': {'Matea Coralia Ioana', 'Matea Coralia', 'Matea Coralia-Ioana'}}, 2014: {'names': {'Matea Coralia'}}, 2015: {'names': {'Matea Coralia-Ioana'}}}
error Matea C. 2014 {2012: {'names': {'Matea Coralia Ioana'}, 'club': 'XXX'}, 2013: {'names': {'Matea Coralia Ioana', 'Matea Coralia', 'Matea Coralia-Ioana'}, 'club': 'XXX'}, 2014: {'names': {'Matea Coralia'}}, 2015: {'names': {'Matea Coralia-Ioana'}}}
error Matea C. 2015 {2012: {'names': {'Matea Coralia Ioana'}, 'club': 'XXX'}, 2013: {'names': {'Matea Coralia Ioana', 'Matea Coralia', 'Matea Coralia-Ioana'}, 'club': 'XXX'}, 2014: {'names': {'Matea Coralia'}, 'club': 'XXX'}, 2015: {'names': {'Matea Coralia-Ioana'}}}
error Macaveiu I. 2012 {2012: {'names': {'Macaveiu Ioan Aurel'}}}
error Crețiu M. 2012 {2012: {'names': {'Crețiu Matei'}}}
error Crețiu T. 2012 {2012: {'names': {'Crețiu Tudor'}}}
error Sânpetru R. 2012 {2012: {'names': {'Sânpetru Raul'}}}
error Moldoveanu A. 2012 {2012: {'names': {'Moldoveanu Alexandru', 'Moldoveanu Alex.'}}}
error Szikszai M. 2012 {2012: {'names': {'Szikszai Mihály'}}}
error Chiper I. 2013 {2013: {'names': {'Chiper Ioan'}}}
error Mitelea C. 2013 {2013: {'names': {'Mitelea Călin'}}, 2014: {'names': {'Mitelea Călin'}}}
error Mitelea C. 2014 {2013: {'names': {'Mitelea Călin'}, 'club': 'XXX'}, 2014: {'names': {'Mitelea Călin'}}}
error Cosma F. 2013 {2013: {'names': {'Cosma Florin'}}}
error Tiron L. 2013 {2013: {'names': {'Tiron Laurenţiu'}}}
error Pîrlea A. 2013 {2013: {'names': {'Pîrlea Andrei Daniel', 'Pârlea Andrei'}}}
error Ungureanu A. 2013 {2013: {'names': {'Ungureanu Alexandru Cătălin'}}}
error Cozan O. 2013 {2013: {'names': {'Cozan  Ovidiu', 'Cozan Ovidiu'}}}
error Canceu A. 2014 {2014: {'names': {'Canceu Adriana', 'Canceu Ana', 'Canceu Adriana-Maria'}}, 2015: {'names': {'Canceu Adriana-Maria'}}}
error Canceu A. 2015 {2014: {'names': {'Canceu Adriana', 'Canceu Ana', 'Canceu Adriana-Maria'}, 'club': 'XXX'}, 2015: {'names': {'Canceu Adriana-Maria'}}}
error Sîvu F. 2014 {2014: {'names': {'Sîvu Francesca'}}}
error Duicu T. 2014 {2014: {'names': {'Duicu Tudor'}}}
error Ceoca A. 2014 {2014: {'names': {'Ceoca Alexandra'}}}
error Forrer Y. 2014 {2014: {'names': {'Forrer Yasmine'}}}
error Fazakas K. 2014 {2014: {'names': {'Fazakas Krisztina'}}}
error Surugiu T. 2014 {2014: {'names': {'Surugiu Tudor Mihai'}}, 2015: {'names': {'Surugiu Tudor Mihai'}}}
error Surugiu T. 2015 {2014: {'names': {'Surugiu Tudor Mihai'}, 'club': 'XXX'}, 2015: {'names': {'Surugiu Tudor Mihai'}}}
error Ferenczi C. 2014 {2014: {'names': {'Ferenczi Cristian'}}}
error Moroșan A. 2014 {2014: {'names': {'Moroșan Andrei', 'Moroşan Andrei'}}, 2015: {'names': {'Moroşan Andrei'}}}
error Moroșan A. 2015 {2014: {'names': {'Moroșan Andrei', 'Moroşan Andrei'}, 'club': 'XXX'}, 2015: {'names': {'Moroşan Andrei'}}}
error Jianu A. 2014 {2014: {'names': {'Jianu Alexandru'}}}
error Tamang S. 2014 {2014: {'names': {'Tamang Sujata'}}, 2015: {'names': {'Tamang Sujata'}}}
error Tamang S. 2015 {2014: {'names': {'Tamang Sujata'}, 'club': 'XXX'}, 2015: {'names': {'Tamang Sujata'}}}
error Bonta T. 2014 {2014: {'names': {'Bonta Tudor'}}}
error Cojoleanca I. 2014 {2014: {'names': {'Cojoleanca Iulian'}}}
error Angelescu M. 2014 {2014: {'names': {'Angelescu Marcel'}}}
error Hassan D. 2014 {2014: {'names': {'Hassan Dariush'}}}
error Costea C. 2015 {2015: {'names': {'Costea Cătălin'}}}
error Constantin D. 2015 {2015: {'names': {'Constantin Doru'}}}
error Molnár E. 2015 {2015: {'names': {'Molnár Emese'}}}
error Trandafir A. 2015 {2015: {'names': {'Trandafir Ana'}}}
error Cismaru D. 2015 {2015: {'names': {'Cismaru Daniela'}}}
error Tütsek K. 2015 {2015: {'names': {'Tütsek Kinga'}}}
error Darkó B. 2015 {2015: {'names': {'Darkó Béla'}}}

In [80]:
clubs={}
for name in all_players:
    for year in all_players[name]:
        club=all_players[name][year]['club'] 
        if club not in clubs:clubs[club]={}
        if year not in clubs[club]:clubs[club][year]=set()
        clubs[club][year].add(name)

In [81]:
def outcome_cleaner(outcome):
    if outcome=='E': return True
    else: return False

In [82]:
def outcome_from_points(aka,shiro):
    if aka==shiro: return 'X',0
    elif aka>shiro: return 'A',str(aka-shiro)
    else: return 'S',str(shiro-aka)

In [83]:
redflags_points=['Puncte']
def point_clean1(point):
    return point.replace('○','O').replace('I','H').replace('×','')\
            .replace('–','').replace('1','O').replace('—','').replace('?','')
def points_cleaner(points):
    hansoku=0
    if '∆' in points:
        hansoku=1
        points=points.replace('∆','')
    if '▲' in points:
        hansoku=1
        points=points.replace('▲','')
    if '(Ht)' in points:
        hansoku=1
        points=points.replace('(Ht)','')
    if '(victorie)' in points:
        points=points.replace('(victorie)','OO')
    points=points.strip()
    if len(points)>2:
        print(points,'error')
    elif len(points)>1:
        point1=points[0]
        point2=points[1]
    elif len(points)>0:
        point1=points[0]
        point2=''
    else:
        point1=''
        point2=''
    point1=point_clean1(point1)
    point2=point_clean1(point2)
    return point1,point2,len(points),hansoku

In [84]:
def club_cleaner(club):
    if '/' in club:
        return club.split('/')[0],club.split('/')[1].upper()
    else:
        return club,'RO'

In [85]:
pretty_clubs={'ARA':'Arashi', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
              'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita', 
              'ICH':'Ichimon', 'IKA':'Ikada','ISH':'Ishhin', 'IT':'Italy','HU':'Hungary',
              'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
              'SR':'Serbia', 'MN':'Montenegro', 'MOL':'Moldova', 'MUS':'Museido', 
               'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
               'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey', 'XXX':'Unknown',
               'YUK':'Yu-kai','KAY':'Kaybukan'}
def pretty_club(club, country):
    if country!='RO':
        return pretty_clubs[country]
    else: return pretty_clubs[club]

In [86]:
master_matches=[]
for year in matches:
    for competition in matches[year]:
        print(year,competition)
        for k in matches[year][competition]:
            good=True
            match={'year':year,'competition':competition}
            match['match_category'],match['match_teams'],match['match_phase']=match_cleaner(year,k['match_type'])
            if 'shinpan' in k:
                for color in ['fukushin1','shushin','fukushin2']:
                    if color in k['shinpan']:
                        if k['shinpan'][color] in all_shinpan_r:
                            match[color]=all_shinpan_r[k['shinpan'][color]]
            aka=k['aka']['name']
            shiro=k['shiro']['name']
            if (name_ok(aka)) and\
               (name_ok(shiro)) and\
               (name_cleaner(aka) in all_players) and\
               (name_cleaner(shiro) in all_players):
                for a in ['aka','shiro']:
                    points=''
                    for h in k[a]:
                        if h=='name':
                            name=k[a][h]
                            match[a+' name']=all_players_r[name]
                            club, country=club_cleaner(all_players[match[a+' name']][year]['club'])
                            match[a+' club'], match[a+' country']=club, country
                            match[a+' pretty_club']=pretty_club(club, country)
                        else:
                            point=k[a][h]
                            if str(point)=='nan': point=''
                            points=points+point
                    for redflag in redflags_points:
                        if redflag in points:
                            good=False
                    if good:
                        match[a+' point1'],match[a+' point2'],match[a+' points'],match[a+' hansoku']=points_cleaner(points)
            else:
                good=False                
            if good:
                if 'outcome' in k:
                    match['encho']=outcome_cleaner(k['outcome'])
                else: 
                    match['encho']=False
                match['winner'],match['difference']=outcome_from_points(match['aka points'],match['shiro points'])

                master_matches.append(match)


2009 CN
2009 CR
2010 CR
2010 CN
2011 CN
2011 CR
2012 CN
2012 CR
2013 CN
2013 CR
2013 SL
2014 SL
2014 CR
2014 CN
2015 SL
2015 CN
2015 CR
2016 SL
2016 CN
2016 CR
2017 CN
2017 CR
2017 SL
2018 CR
2018 SL

In [87]:
data=pd.DataFrame(master_matches)

Cleanup


In [91]:
data['aka hansoku']=data['aka hansoku'].replace(0,'').replace(1,'Δ')
data['shiro hansoku']=data['shiro hansoku'].replace(0,'').replace(1,'Δ')

In [92]:
data.to_csv('data/matches.csv')

Group by player


In [93]:
aka=data[[i for i in data.columns if 'shiro ' not in i]]
aka.columns=[i.replace('aka ','') for i in aka.columns]
aka['color']='aka'
aka['opponent']=data['shiro name']


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.

In [94]:
shiro=data[[i for i in data.columns if 'aka ' not in i]]
shiro.columns=[i.replace('shiro ','') for i in shiro.columns]
shiro['color']='shiro'
shiro['opponent']=data['aka name']


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.

In [95]:
extended_matches=pd.concat([aka,shiro],axis=0).reset_index(drop=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  """Entry point for launching an IPython kernel.

In [96]:
extended_matches.head()


Out[96]:
club color competition country difference encho fukushin1 fukushin2 hansoku match_category ... match_teams name opponent point1 point2 points pretty_club shushin winner year
0 CRK aka CN RO 1 False NaN NaN Unknown ... Unknown Chirea V. Benedek L. M 1 Clubul Român de Kendo NaN A 2009
1 CRK aka CN RO 2 False NaN NaN Unknown ... Unknown Chirea V. Chirea A. 0 Clubul Român de Kendo NaN S 2009
2 CRK aka CN RO 2 False NaN NaN Unknown ... Unknown Chirea V. Illyés A. 0 Clubul Român de Kendo NaN S 2009
3 BTK aka CN RO 1 False NaN NaN Unknown ... Unknown Benedek L. Chirea A. K K 2 Bushi Tokukai NaN A 2009
4 BTK aka CN RO 1 False NaN NaN Unknown ... Unknown Benedek L. Illyés A. 0 Bushi Tokukai NaN S 2009

5 rows × 21 columns


In [97]:
extended_matches.to_csv('data/extended_matches.csv')

In [98]:
p1=extended_matches[[i for i in extended_matches.columns if i!='point2']]
p2=extended_matches[[i for i in extended_matches.columns if i!='point1']]
p1.rename(columns={'point1':'point'}, inplace=True)
p2.rename(columns={'point2':'point'}, inplace=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\pandas\core\frame.py:3781: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)

In [99]:
extended_points=pd.concat([p1,p2],axis=0).reset_index(drop=True)

In [100]:
extended_points.to_csv('data/extended_points.csv')

In [101]:
extended_points.columns


Out[101]:
Index(['club', 'color', 'competition', 'country', 'difference', 'encho',
       'fukushin1', 'fukushin2', 'hansoku', 'match_category', 'match_phase',
       'match_teams', 'name', 'opponent', 'point', 'points', 'pretty_club',
       'shushin', 'winner', 'year'],
      dtype='object')

In [102]:
shu=extended_points[[i for i in extended_points.columns if 'fukushin' not in i]]
shu.columns=[i.replace('shushin','shinpan') for i in shu.columns]
fk1=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin2' not in i]]
fk1.columns=[i.replace('fukushin1','shinpan') for i in fk1.columns]
fk2=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin1' not in i]]
fk2.columns=[i.replace('fukushin2','shinpan') for i in fk2.columns]
extended_shinpan=pd.concat([shu,fk1,fk2],axis=0).reset_index(drop=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  import sys

In [103]:
extended_shinpan.to_csv('data/extended_shinpan.csv')

In [104]:
extended_shinpan.columns


Out[104]:
Index(['club', 'color', 'competition', 'country', 'difference', 'encho',
       'hansoku', 'match_category', 'match_phase', 'match_teams', 'name',
       'opponent', 'point', 'points', 'pretty_club', 'shinpan', 'winner',
       'year'],
      dtype='object')

Competitor statistics


In [105]:
competitors={}
for i in data.T.iteritems():
    for a in ['aka ','shiro ']:
        name=i[1][a+'name']
        club=i[1][a+'club']
        if name not in competitors:
            competitors[name]={'U':0,'club':club}
        for j in ['point1','point2']:
            point=i[1][a+j] 
            if point!='':
                if point not in competitors[name]:competitors[name][point]=0
                competitors[name][point]+=1
        competitors[name]['U']+=1

In [106]:
data2=pd.DataFrame(competitors)

In [107]:
data2.T.to_csv('data/competitors.csv')

In [ ]:


In [ ]: